Setup:
#Load required packages
require(ggplot2)
## Loading required package: ggplot2
require(grid)
## Loading required package: grid
require(plyr)
## Loading required package: plyr
#Load diamonds and mtcars dataframes
data(diamonds)
data(mtcars)
#Add logical column to diamonds dataframe
#Here we generate a vector of random values between 0 and 1, generating a random value for each row in the user's dataframe
randVec <- runif(nrow(diamonds),0.0,1.0)
#We initialize logicVec as an empty vector
logicVec <- c()
#The variable pctTrue represents the percentage of TRUE entries in the new logical column of the user's dataframe; we use 0.4375 as per Professor G's posted interpretation of the logical column
pctTrue <- 0.4375
#We loop through each value in randVec so we may assign corresponding values to logicVec
for(i in 1:length(randVec)){
#If the value randVec[i] is less than pctTrue, we add a TRUE to logicVec; otherwise, we add a FALSE to logicVec
if(randVec[i] <= pctTrue){
logicVec[i] <- TRUE
}
else{
logicVec[i] <- FALSE
}
}
#We add logicVec to diamonds
diamonds$logical <- logicVec
Explore Function:
This function takes a dataframe, a vector of histogram bin sizes, and a correlation threshold from the user and: 1) Plots count and density histograms for each of the numeric variables in the user’s dataframe, first with a default bin size and then with bin sizes as specified in the user’s vector. 2) Plots bar graphs for each of the categorical variables in the user’s dataframe. 3) Returns a list of the following: a: Frequency tables for all categorical variables in the user’s dataframe b: Statistical summary tables for all numeric variables inthe user’s dataframe c: A table containing the R-square value for each pair of numeric variables in the user’s dataframe d: A table containing the correlation coefficient for each pair of numeric variables in the user’s dataframe. Pairs are only included if their correlation coefficient is greater than the threshold input by the user
Parameters: - dataFrame: the dataframe input by the user - binSizes: a vector of histogram bin sizes input by the user - corrVal: the correlation threshold input by the user, as described above
Return: - returnList: an R list object containing all of the plots and tables described above
Explore <- function(dataFrame,binSizes,corrVal){
##############################################################################
### Question 1 ###
##############################################################################
#We first isolate the numeric columns in the user's dataframe
numFrame <- dataFrame[sapply(dataFrame,is.numeric)]
#We initialize numPlots as an empty list
numPlots <- list()
#The index variable k is initialized to 1; k will correspond to the total number of plots in numPlots
k <- 1
#If there are numeric columns in the user's dataframe, we proceed as follows
if(ncol(numFrame)>0){
#We loop through each of the columns in numFrame so that we may plot each of the numeric columns in the user's dataframe
for(i in 1:ncol(numFrame)){
#We pull the title of the ith column, so we may avoid calling a function in our aes_string call below
colName <- colnames(numFrame)[i]
#We pull the mean of the ith column, so that we may avoid calling a function in our geom_vline call below
colMean <- mean(numFrame[,i])
#First, we create two histograms for each variable (one for count, another for density) with default bin sizes. These default plots are generated as the bin sizes input by the user may all be unreasonably small or large for some variables, depending on their range.
#Plots are generated by calling ggplot along with geom_histogram (to plot the histogram) and geom_vline (to plot the vertical line at the variable's mean)
numPlots[[k]] <- ggplot(numFrame,aes_string(colName)) + geom_histogram(fill="blue") + geom_vline(xintercept=colMean,color="red")
#After each plot, we increase k by 1 so that we may move on to the next plot
k <- k+1
numPlots[[k]] <- ggplot(numFrame,aes_string(colName)) + geom_histogram(aes(y=..density..),fill="blue") + geom_vline(xintercept=colMean,color="red")
k <- k+1
#Now that the default plots for the current variable have been generated, we loop through each of the bin sizes input by the user
for(j in 1:length(binSizes)){
#binWidth stores the current bin size
binWidth = binSizes[j]
#Plots awre generated as above, now adding the binwidth specification
numPlots[[k]] <- ggplot(numFrame,aes_string(colName)) + geom_histogram(binwidth=binWidth,fill="blue") + geom_vline(xintercept=colMean,color="red")
k <- k+1
numPlots[[k]] <- ggplot(numFrame,aes_string(colName)) + geom_histogram(aes(y=..density..),binwidth=binWidth,fill="blue") + geom_vline(xintercept=colMean,color="red")
k <- k+1
}
}
}
##############################################################################
### Question 2 ###
##############################################################################
#We isolate the logical and factor columns in the user's dataframe
logFrame <- dataFrame[sapply(dataFrame,is.logical)]
facFrame <- dataFrame[sapply(dataFrame,is.factor)]
#We now combine all of the logical and factor columns to get a dataframe containing all of the categorical variables in the user's dataframe
catFrame <- data.frame(logFrame,facFrame)
#We initialize catPlots as an empty list
catPlots <- list()
#If there are categorical columns in the user's dataframe, we proceed as follows
if(ncol(catFrame)>0){
#We loop through each of the columns in catFrame
for(i in 1:ncol(catFrame)){
#xLabel stores the name of the column to be plotted, so we may avoid calling a function inside our aes call below
xLabel <- colnames(catFrame)[i]
#We call ggplot and geom_bar to produce a bar plot for each column in catFrame
#Each plot is then added to catPlots so that all plots may be returned
catPlots[[i]] <- ggplot(catFrame,aes_string(xLabel))+geom_bar(fill="grey")
}
}
##############################################################################
### Question 3 ###
##############################################################################
#The vector corrTitle, which will hold the titles of each pair of numeric columns, is initialized as an empty vector
#Each value in this vector will be of the form 'col1-col2', where col1 and col2 are the titles of column 1 and column 2
corrTitle <- c()
#The vector corrValue, which will hold the Pearson correlation coefficient for each pair of numeric columns, is initialized as an empty vector
corrValue <- c()
#The index variable k is reset to 1; k now corresponds to the total number of non-repeating pairs of numeric columns in dataFrame
k <- 1
#We loop through each column in numFrame (as created above) so that we may take the correlation between that column and the next
#If there are numeric columns in the user's dataframe, we proceed as follows
if(ncol(numFrame)>0){
for(i in 1:(ncol(numFrame)-1)){
#We start our next loop at i+1, so that we do not repeat pairs of columns
#Starting our loop at i+1 also avoids taking unneccessary correlations between a column and itself, i.e. we do not take the correlation between carat and carat, as that is obviously 1
for(j in (i+1):(ncol(numFrame))){
#We take the title of the current pair of columns
corrTitle[k] <- paste(colnames(numFrame)[i],"-",colnames(numFrame)[j],sep="")
#We take the correlation of the current pair of columns
corrValue[k] <- cor(numFrame[,i],numFrame[,j],method="pearson")
#We increase k by 1, as we now consider the next pair
k <- k+1
}
}
}
#We initialize corrFrame as an empty dataframe with one row for each pair of columns taken above
corrFrame <- data.frame(matrix(ncol=0,nrow=k-1))
#We insert two new columns to corrFrame, one for the title of each pair of columns, another for the r-square for each pair of columns
corrFrame["Columns"] <- corrTitle
corrFrame["R-Square"] <- corrValue^2
#Please note that this table will be returned on its own, but only later on when it is required for Question 4, so as not to be redundant
##############################################################################
### Question 4 ###
##############################################################################
#We initialize summaryTables as an empty list
summaryTables <- list()
#The index variable k is reset to 1
k <- 1
#If there are categorical columns in the user's dataframe, we proceed as follows
if(ncol(catFrame)>0){
#We loop through each of the columns in catFrame (as created above)
for(i in 1:ncol(catFrame)){
#varName stores the name of the column for which we are taking the next frequency table, so we may avoid calling a function inside our count call below
varName <- colnames(catFrame)[i]
#We use the count function from the plyr package to construct frequency tables for each column in catFrame
#The count function returns the frequency tables as dataframes, each of which is stored in summaryTables so that all tables may be returned
summaryTables[[k]] <- count(catFrame,varName)
#We increase k by 1, as we now consider the next categorical variable
k <- k+1
}
}
#If there are numeric columns in the user's dataframe, we proceed as follows
if(ncol(numFrame)>0){
#We call the summary function to generate statistical summaries for each column in numFrame. All tables are added at once.
summaryTables[[k]] <- summary(numFrame)
#We increase k by 1, as we now consider the next object to be added to summaryTables
k <- k+1
}
#We add corrFrame (as created above) to summaryTables
summaryTables[[k]] <- corrFrame
#We increase k by 1, as we now consider the next object to be added to summaryTables
k <- k+1
#The vector corrTitle, which will hold the titles of each pair of numeric columns, is initialized as an empty vector
#Each value in this vector will be of the form 'col1-col2', where col1 and col2 are the titles of column 1 and column 2
corrTitle <- c()
#The vector corrValue, which will hold the Pearson correlation coefficient for each pair of numeric columns, is initialized as an empty vector
corrValue <- c()
#The index variable n is initialized to 1; n corresponds to the total number of non-repeating pairs of numeric columns in dataFrame
n <- 1
#The index variable m is initialized to 1; m corresponds to the total number of non-repeating pairs of numeric columns in dataFrame with a correlation greater in absolute value than our threshold
m <- 1
#If there are numeric columns in the user's dataframe, we proceed as follows
if(ncol(numFrame)>0){
#We loop through each column in numFrame (as created above) so that we may take the correlation between that column and the next
for(i in 1:(ncol(numFrame)-1)){
#We start our next loop at i+1, so that we do not repeat pairs of columns
#Starting our loop at i+1 also avoids taking unneccessary correlations between a column and itself, i.e. we do not take the correlation between carat and carat, as that is obviously 1
for(j in (i+1):(ncol(numFrame))){
#We only perform the following steps for pairs of columns with a correlation coefficient that is greater in absolute value than our threshold
if(abs(cor(numFrame[,i],numFrame[,j],method="pearson"))>corrVal){
#We take the title of the current pair of columns
corrTitle[m] <- paste(colnames(numFrame)[i],"-",colnames(numFrame)[j],sep="")
#We take the correlation of the current pair of columns
corrValue[m] <- cor(numFrame[,i],numFrame[,j],method="pearson")
#We increase m by 1, as we now consider the next pair of variables with a high correlation
m <- m+1
}
#We increase n by 1, as we now consider the next pair
n <- n+1
}
}
}
#We reset corrFrame to an empty dataframe with one row for each pair of columns taken above with a correlation coefficient greater in absolute value than our threshold
corrFrame <- data.frame(matrix(ncol=0,nrow=m-1))
#We insert two new columns to our return dataframe, one for the title of each pair of columns, another for the correlation of each pair of columns
corrFrame["Columns"] <- corrTitle
corrFrame["Correlation"] <- corrValue
#The new corrFrame is added to summaryTables
summaryTables[[k]] <- corrFrame
#returnList stores all of the objects created by the function; the histograms requested in Question 1 are stored in numPlots, the bar graphs requested in Question 2 are stored in catPlots, and all of the tables requested by Questions 3 and 4 are stored in summaryTables
returnList <- list(numPlots,catPlots,summaryTables)
return(returnList)
}
#Tests the function
Explore(diamonds,c(5,20,50),0.25)
## [[1]]
## [[1]][[1]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[2]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[3]]
##
## [[1]][[4]]
##
## [[1]][[5]]
##
## [[1]][[6]]
##
## [[1]][[7]]
##
## [[1]][[8]]
##
## [[1]][[9]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[10]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[11]]
##
## [[1]][[12]]
##
## [[1]][[13]]
##
## [[1]][[14]]
##
## [[1]][[15]]
##
## [[1]][[16]]
##
## [[1]][[17]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[18]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[19]]
##
## [[1]][[20]]
##
## [[1]][[21]]
##
## [[1]][[22]]
##
## [[1]][[23]]
##
## [[1]][[24]]
##
## [[1]][[25]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[26]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[27]]
##
## [[1]][[28]]
##
## [[1]][[29]]
##
## [[1]][[30]]
##
## [[1]][[31]]
##
## [[1]][[32]]
##
## [[1]][[33]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[34]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[35]]
##
## [[1]][[36]]
##
## [[1]][[37]]
##
## [[1]][[38]]
##
## [[1]][[39]]
##
## [[1]][[40]]
##
## [[1]][[41]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[42]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[43]]
##
## [[1]][[44]]
##
## [[1]][[45]]
##
## [[1]][[46]]
##
## [[1]][[47]]
##
## [[1]][[48]]
##
## [[1]][[49]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[50]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[51]]
##
## [[1]][[52]]
##
## [[1]][[53]]
##
## [[1]][[54]]
##
## [[1]][[55]]
##
## [[1]][[56]]
##
##
## [[2]]
## [[2]][[1]]
##
## [[2]][[2]]
##
## [[2]][[3]]
##
## [[2]][[4]]
##
##
## [[3]]
## [[3]][[1]]
## logical freq
## 1 FALSE 30438
## 2 TRUE 23502
##
## [[3]][[2]]
## cut freq
## 1 Fair 1610
## 2 Good 4906
## 3 Very Good 12082
## 4 Premium 13791
## 5 Ideal 21551
##
## [[3]][[3]]
## color freq
## 1 D 6775
## 2 E 9797
## 3 F 9542
## 4 G 11292
## 5 H 8304
## 6 I 5422
## 7 J 2808
##
## [[3]][[4]]
## clarity freq
## 1 I1 741
## 2 SI2 9194
## 3 SI1 13065
## 4 VS2 12258
## 5 VS1 8171
## 6 VVS2 5066
## 7 VVS1 3655
## 8 IF 1790
##
## [[3]][[5]]
## carat depth table price
## Min. :0.2000 Min. :43.00 Min. :43.00 Min. : 326
## 1st Qu.:0.4000 1st Qu.:61.00 1st Qu.:56.00 1st Qu.: 950
## Median :0.7000 Median :61.80 Median :57.00 Median : 2401
## Mean :0.7979 Mean :61.75 Mean :57.46 Mean : 3933
## 3rd Qu.:1.0400 3rd Qu.:62.50 3rd Qu.:59.00 3rd Qu.: 5324
## Max. :5.0100 Max. :79.00 Max. :95.00 Max. :18823
## x y z
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 4.710 1st Qu.: 4.720 1st Qu.: 2.910
## Median : 5.700 Median : 5.710 Median : 3.530
## Mean : 5.731 Mean : 5.735 Mean : 3.539
## 3rd Qu.: 6.540 3rd Qu.: 6.540 3rd Qu.: 4.040
## Max. :10.740 Max. :58.900 Max. :31.800
##
## [[3]][[6]]
## Columns R-Square
## 1 carat-depth 0.0007966119
## 2 carat-table 0.0329849332
## 3 carat-price 0.8493305264
## 4 carat-x 0.9508087510
## 5 carat-y 0.9057751441
## 6 carat-z 0.9089474974
## 7 depth-table 0.0874849338
## 8 depth-price 0.0001133672
## 9 depth-x 0.0006395460
## 10 depth-y 0.0008608750
## 11 depth-z 0.0090105434
## 12 table-price 0.0161630291
## 13 table-x 0.0381593881
## 14 table-y 0.0337677917
## 15 table-z 0.0227794699
## 16 price-x 0.7822255540
## 17 price-y 0.7489533305
## 18 price-z 0.7417506045
## 19 x-y 0.9500429745
## 20 x-z 0.9423978849
## 21 y-z 0.9063148836
##
## [[3]][[7]]
## Columns Correlation
## 1 carat-price 0.9215913
## 2 carat-x 0.9750942
## 3 carat-y 0.9517222
## 4 carat-z 0.9533874
## 5 depth-table -0.2957785
## 6 price-x 0.8844352
## 7 price-y 0.8654209
## 8 price-z 0.8612494
## 9 x-y 0.9747015
## 10 x-z 0.9707718
## 11 y-z 0.9520057
Explore(mtcars,c(5,20,50),0.25)
## [[1]]
## [[1]][[1]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[2]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[3]]
##
## [[1]][[4]]
##
## [[1]][[5]]
##
## [[1]][[6]]
##
## [[1]][[7]]
##
## [[1]][[8]]
##
## [[1]][[9]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[10]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[11]]
##
## [[1]][[12]]
##
## [[1]][[13]]
##
## [[1]][[14]]
##
## [[1]][[15]]
##
## [[1]][[16]]
##
## [[1]][[17]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[18]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[19]]
##
## [[1]][[20]]
##
## [[1]][[21]]
##
## [[1]][[22]]
##
## [[1]][[23]]
##
## [[1]][[24]]
##
## [[1]][[25]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[26]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[27]]
##
## [[1]][[28]]
##
## [[1]][[29]]
##
## [[1]][[30]]
##
## [[1]][[31]]
##
## [[1]][[32]]
##
## [[1]][[33]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[34]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[35]]
##
## [[1]][[36]]
##
## [[1]][[37]]
##
## [[1]][[38]]
##
## [[1]][[39]]
##
## [[1]][[40]]
##
## [[1]][[41]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[42]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[43]]
##
## [[1]][[44]]
##
## [[1]][[45]]
##
## [[1]][[46]]
##
## [[1]][[47]]
##
## [[1]][[48]]
##
## [[1]][[49]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[50]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[51]]
##
## [[1]][[52]]
##
## [[1]][[53]]
##
## [[1]][[54]]
##
## [[1]][[55]]
##
## [[1]][[56]]
##
## [[1]][[57]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[58]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[59]]
##
## [[1]][[60]]
##
## [[1]][[61]]
##
## [[1]][[62]]
##
## [[1]][[63]]
##
## [[1]][[64]]
##
## [[1]][[65]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[66]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[67]]
##
## [[1]][[68]]
##
## [[1]][[69]]
##
## [[1]][[70]]
##
## [[1]][[71]]
##
## [[1]][[72]]
##
## [[1]][[73]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[74]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[75]]
##
## [[1]][[76]]
##
## [[1]][[77]]
##
## [[1]][[78]]
##
## [[1]][[79]]
##
## [[1]][[80]]
##
## [[1]][[81]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[82]]
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
##
## [[1]][[83]]
##
## [[1]][[84]]
##
## [[1]][[85]]
##
## [[1]][[86]]
##
## [[1]][[87]]
##
## [[1]][[88]]
##
##
## [[2]]
## list()
##
## [[3]]
## [[3]][[1]]
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
##
## [[3]][[2]]
## Columns R-Square
## 1 mpg-cyl 0.726180005
## 2 mpg-disp 0.718343340
## 3 mpg-hp 0.602437341
## 4 mpg-drat 0.463995168
## 5 mpg-wt 0.752832794
## 6 mpg-qsec 0.175296320
## 7 mpg-vs 0.440947686
## 8 mpg-am 0.359798943
## 9 mpg-gear 0.230673448
## 10 mpg-carb 0.303518437
## 11 cyl-disp 0.813663302
## 12 cyl-hp 0.692968762
## 13 cyl-drat 0.489913363
## 14 cyl-wt 0.612299668
## 15 cyl-qsec 0.349567190
## 16 cyl-vs 0.657415769
## 17 cyl-am 0.273118125
## 18 cyl-gear 0.242740085
## 19 cyl-carb 0.277716662
## 20 disp-hp 0.625599666
## 21 disp-drat 0.504403822
## 22 disp-wt 0.788508342
## 23 disp-qsec 0.188093852
## 24 disp-vs 0.504690738
## 25 disp-am 0.349549413
## 26 disp-gear 0.308657134
## 27 disp-carb 0.156006724
## 28 hp-drat 0.201384745
## 29 hp-wt 0.433948779
## 30 hp-qsec 0.501580369
## 31 hp-vs 0.522868892
## 32 hp-am 0.059148311
## 33 hp-gear 0.015801561
## 34 hp-carb 0.562218742
## 35 drat-wt 0.507571675
## 36 drat-qsec 0.008318308
## 37 drat-vs 0.193845127
## 38 drat-am 0.507957151
## 39 drat-gear 0.489454337
## 40 drat-carb 0.008242788
## 41 wt-qsec 0.030525638
## 42 wt-vs 0.307931409
## 43 wt-am 0.479549684
## 44 wt-gear 0.340223720
## 45 wt-carb 0.182846838
## 46 qsec-vs 0.554333027
## 47 qsec-am 0.052836016
## 48 qsec-gear 0.045233731
## 49 qsec-carb 0.430663050
## 50 vs-am 0.028340081
## 51 vs-gear 0.042445620
## 52 vs-carb 0.324452295
## 53 am-gear 0.630529315
## 54 am-carb 0.003310202
## 55 gear-carb 0.075115920
##
## [[3]][[3]]
## Columns Correlation
## 1 mpg-cyl -0.8521620
## 2 mpg-disp -0.8475514
## 3 mpg-hp -0.7761684
## 4 mpg-drat 0.6811719
## 5 mpg-wt -0.8676594
## 6 mpg-qsec 0.4186840
## 7 mpg-vs 0.6640389
## 8 mpg-am 0.5998324
## 9 mpg-gear 0.4802848
## 10 mpg-carb -0.5509251
## 11 cyl-disp 0.9020329
## 12 cyl-hp 0.8324475
## 13 cyl-drat -0.6999381
## 14 cyl-wt 0.7824958
## 15 cyl-qsec -0.5912421
## 16 cyl-vs -0.8108118
## 17 cyl-am -0.5226070
## 18 cyl-gear -0.4926866
## 19 cyl-carb 0.5269883
## 20 disp-hp 0.7909486
## 21 disp-drat -0.7102139
## 22 disp-wt 0.8879799
## 23 disp-qsec -0.4336979
## 24 disp-vs -0.7104159
## 25 disp-am -0.5912270
## 26 disp-gear -0.5555692
## 27 disp-carb 0.3949769
## 28 hp-drat -0.4487591
## 29 hp-wt 0.6587479
## 30 hp-qsec -0.7082234
## 31 hp-vs -0.7230967
## 32 hp-carb 0.7498125
## 33 drat-wt -0.7124406
## 34 drat-vs 0.4402785
## 35 drat-am 0.7127111
## 36 drat-gear 0.6996101
## 37 wt-vs -0.5549157
## 38 wt-am -0.6924953
## 39 wt-gear -0.5832870
## 40 wt-carb 0.4276059
## 41 qsec-vs 0.7445354
## 42 qsec-carb -0.6562492
## 43 vs-carb -0.5696071
## 44 am-gear 0.7940588
## 45 gear-carb 0.2740728